home *** CD-ROM | disk | FTP | other *** search
- #include <ctype.h>
- #include <stdio.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include "util.h"
- #include "combine.h"
- /*
- * main: Main program for the COMBINE utility
- *
- * This routine is the driver for the utility.
- *
- * Return value:
- * This procedure has no return value.
- */
-
- void main (argc, argv)
- int argc; /* command line argument count */
-
- char **argv; /* command line arguments */
-
- {
-
- struct stat stat_buf;/* Buf. to find last written date */
-
- /*
- * Execute program phases.
- */
-
- if (!isatty (fileno (stdout))) {
- fstat (fileno (stdout), &stat_buf);
- setvbuf (stdout, mem_alloc (stat_buf.st_blksize),
- _IOFBF, stat_buf.st_blksize);
- }
-
- init (argc, argv); /* Perform program initialization */
-
- if (p1_debug || pa_debug) {
- fputs ("Start Pass1\n", stderr);
- }
- pass1 (); /* Read files building symbol table and record arrays. */
- if (p1_debug || pa_debug) {
- dump_sym_tab ("Pass1 symbol table");
- dump_arrays ("Pass1 arrays");
- }
-
- if (p2_debug || pa_debug) {
- fputs ("Start Pass2\n", stderr);
- }
- pass2 (); /* Determine anchor points in files. */
- if (p2_debug || pa_debug) {
- dump_arrays ("Pass2 arrays");
- }
-
- if (p3_debug || pa_debug) {
- fputs ("Start Pass3\n", stderr);
- }
- pass3 (); /* Expand anchors to non-unique lines. */
- if (p3_debug || pa_debug) {
- dump_arrays ("Pass3 arrays");
- }
-
- if (p4_debug || pa_debug) {
- fputs ("Start Pass4\n", stderr);
- }
- pass4 (); /* Fix non-uniques surrounded by insertions */
- if (p4_debug || pa_debug) {
- dump_arrays ("Pass4 arrays");
- }
-
- if (p5_debug || pa_debug) {
- fputs ("Start Pass5\n", stderr);
- }
- pass5 (); /* Write output files. */
-
- if (statistics_flag) {
- dump_statistics ();
- }
-
- if (old_new1_change_count == 0 &&
- (file_count == 2 ||
- (old_new2_change_count == 0 &&
- new1_new2_change_count == 0))) {
- exit (0);
- } else {
- exit (1);
- }
-
- }
- /*
- * dump_arrays: dump arrays for debugging purposes
- *
- * This routine outputs the record arrays to the standard output file.
- *
- * Return value:
- * This procedure has no return value.
- */
-
- void dump_arrays (message)
- char *message; /* input */
- /* Message to print before arrays */
-
- {
-
- int i; /* Misc. variable */
-
- int index; /* Index into record array */
-
- int files_left; /* number of files left to do */
-
- bool file_done[MAX_FILE_COUNT];/* TRUE if EOT reached on file */
-
- record_type * record_ptr;/* Pointer to current record */
-
-
-
- /*
- * Initialize completion parameters.
- */
- printf ("%s\n", message);
-
- files_left = file_count;
- for (i = 0; i < file_count; ++i) {
- file_done[i] = FALSE;
- }
-
- /*
- * For each iteration of the file read the nth record of the each file.
- */
- for (index = BEGIN_INDEX + 1; files_left != 0; ++index) {
-
- printf ("record: %5d ", index);
-
- /*
- * Handle each file.
- */
-
- for (i = 0; i < file_count; ++i) {
-
- if (!file_done[i]) {
-
- if (index >= files[i].record_array_size - 1) {
- file_done[i] = TRUE;
- --files_left;
- if (files_left == 0) {
- break;
- }
- printf ("%38.38s", " ");
- continue;
- }
-
- record_ptr = &(files[i].record[index]);
-
- printf (" rfa:%6d val1:%6d val2:%6d",
- record_ptr -> rfa,
- record_ptr -> value[0],
- record_ptr -> value[1]);
-
- } else {
- printf ("%38.38s", " ");
-
- }
-
- }
-
- printf ("\n");
-
- }
-
- }
- /*
- * dump_statistics: Dump statistics
- *
- * This routine outputs the execution statistics * to the standard output
- * file.
- *
- * Return value:
- * This procedure has no return value.
- */
- void dump_statistics () {
-
- int i; /* Misc. variable */
-
-
-
- /*
- * Initialize completion parameters.
- */
-
- printf ("\fStatistics:\n\n");
-
- printf ("Cache misses: %d\n", cache_miss);
- printf ("Hash collisions: %d\n", hash_collisions);
-
- printf ("Line counts:\n");
- for (i = 0; i < file_count; ++i) {
- printf (" %5d: '%s'\n",
- files[i].record_array_size - DUMMY_RECORD_COUNT,
- files[i].name_ptr);
- }
-
- printf ("Changes:\n");
- printf (" '%s' and '%s' ", files[OLD_FILE].name_ptr,
- files[NEW1_FILE].name_ptr);
- if (old_new1_change_count == 0) {
- printf ("are identical.\n");
- } else {
- printf ("have %d differences.\n", old_new1_change_count);
- }
-
- if (file_count > 2) {
- printf (" '%s' and '%s' ", files[OLD_FILE].name_ptr,
- files[NEW2_FILE].name_ptr);
- if (old_new2_change_count == 0) {
- printf ("are identical.\n");
- } else {
- printf ("have %d differences.\n", old_new2_change_count);
- }
-
- printf (" '%s' and '%s' ", files[NEW1_FILE].name_ptr,
- files[NEW2_FILE].name_ptr);
- if (new1_new2_change_count == 0) {
- printf ("are identical.\n");
- } else {
- printf ("have %d differences.\n", new1_new2_change_count);
- }
- }
-
- }
- /*
- * dump_sym_tab: dump symbol table for debugging purposes
- *
- * This routine outputs the symbol table to the standard output file.
- *
- * Return value:
- * This procedure has no return value.
- */
- void dump_sym_tab (message)
- char *message; /* input */
- /* Message to print before table */
-
- {
-
- int i; /* Misc. variable */
-
-
-
- /*
- * Write each used symbol table entry.
- */
-
- printf ("%s\n", message);
-
- for (i = 0; i < sym_tab_size; ++i) {
- if (sym_tab_cache_ptr[i] != CACHE_FREE_ENTRY) {
-
- printf ("hash:%5d old:%5d new1:%5d ", i,
- files[OLD_FILE].sym_tab_index[i],
- files[NEW1_FILE].sym_tab_index[i]);
-
- if (file_count == 3) {
- printf ("new2:%5d ", files[NEW2_FILE].sym_tab_index[i]);
- }
-
- if (sym_tab_cache_ptr[i] ==
- (cache_entry_type *) CACHE_NOT_IN_CACHE) {
-
- printf ("(record not in cache)");
- } else {
- if (sym_tab_cache_ptr[i] -> hash_code != i) {
- printf ("(cache_hash_code wrong: %d)",
- sym_tab_cache_ptr[i]->hash_code);
- }
- if (sym_tab_cache_ptr[i] -> record_length < 0) {
- sym_tab_cache_ptr[i] ->recordp[0] = '\0';
- } else {
- sym_tab_cache_ptr[i] ->
- recordp[sym_tab_cache_ptr[i] ->
- record_length] = '\0';
- }
- printf ("cache_record(%d): %s",
- sym_tab_cache_ptr[i] -> record_length,
- sym_tab_cache_ptr[i] -> recordp);
- }
-
- printf ("\n");
-
- }
- }
-
- }
-
- /*
- * print_usage: Print program usage and exit with the given status
- */
- void print_usage (status)
- int status; /* exit value */
- {
- fputs(
- "\
- Usage: combine [-BbHhqs] [-c #,#] [-d flag] [-L #] [-P #] [-p #]\n\
- [-1 text] [-2 text] old_file new1_file [new2_file]\n\
- Options:\n\
- -H Help option -- print this message and exit.\n\
- -b Blank compress option -- treat all whitespace as a single space.\n\
- -B Blank remove option -- ignore all whitespace.\n\
- -c #,# Column specification option -- specify column range to compare.\n\
- -d flag Debug options -- specifies how much debug information is to be\n\
- output (<flag> should be one of [1-5] to debug pass #n or\n\
- `a' to debug all passes).\n\
- -h h option -- produces a composite file on standard output\n\
- suitable for input into combine2.\n\
- -L # Lines option -- specify the number of lines to print on a page\n\
- of output. Specifying a length of zero disables pagination\n\
- (default page-length is 66 lines).\n\
- -P # Prefix option -- specify the number of unchanged lines to output\n\
- prior to any group of changed lines (default is 5 lines).\n\
- -p # Postfix option -- specify the number of unchanged lines to output\n\
- following any group of changed lines (default is 5 lines).\n\
- -q Quiet option -- no output is generated if no changes are detected.\n\
- -s Statistics option -- print statistics after the comparison.\n\
- -1 text New1 file description -- symbolic description of <new1_file>.\n\
- -2 text New2 file description -- symbolic description of <new2_file>.\n\
- " , stderr );
-
- exit(status);
- }
-
- /*
- * init: Perform program initialization.
- *
- *
- * This routine interprets the command line and opens the files.
- *
- * Return value:
- * This procedure has no return value.
- */
-
- void init (argc, argv)
- int argc; /* argument count from 'main' */
-
- char **argv; /* arguments from 'main' */
-
-
- {
-
- char *basename_ptr = 0;/* basename of files */
-
- int cache_entry_size;/* Number of bytes in a cache entry */
-
- cache_entry_type * cache_ptr;/* Pointer to cache entry */
-
- int different_basenames = 0;
- /* TRUE if file basenames are different */
-
- int directory_count = 0;
- /* number of command line arguments which are
- actually directories */
-
- FILE * dummy_file; /* can't assign to stdin on UNIX */
-
- long etime; /* Current time of day */
-
- int is_directory[MAX_FILE_COUNT]; /* TRUE if file is a directory */
-
- int i; /* Misc. variable */
- int j; /* Misc. variable */
- int k; /* Misc. variable */
-
- int max_record_len = LINE_LENGTH; /* max initial record length */
-
- int record_count; /* Number of records in record array */
-
- struct stat stat_buf;/* Buf. to find last written date */
-
- char *the_cache; /* Ptr to head of cache */
-
- char *temp_ptr; /* Misc char ptr */
-
- int total_record_count;/* Total number of records in all files */
-
- int c; /* Option character */
-
- extern int optind; /* Option index */
-
- extern char *optarg; /* Option argument pointer */
-
- extern int getopt ();/* getopt routine */
-
- extern char *ctime ();/* convert time routine */
-
- extern char *strrchr ();/* search for character in string */
-
-
- #ifdef VOS
- stdout -> carriage_control = TRUE;
- #endif
-
- /*
- * Scan options arguments.
- */
- (void) time (&etime);
- (void) strcpy (exec_time, ctime (&etime));
- exec_time[strlen (exec_time) - 1] = '\0'; /* remove newline character */
-
- for (;;) {
-
- c = getopt (argc, argv, "HbBhsqc:d:p:P:1:2:L:");
- if (c == EOF) {
- break;
- }
-
- switch (c) {
-
- /*
- * H option - print usage and exit
- */
- case 'H':
- print_usage(0);
- break;
-
- /*
- * B and b option: Blank remove and blank compress
- * options.
- */
- case 'b':
- blank_compress = TRUE;
- compress_records = TRUE;
- break;
-
- case 'B':
- blank_remove = TRUE;
- compress_records = TRUE;
- break;
- /*
- * c option: Compare only specified columns.
- */
-
- case 'c':
- compress_records = TRUE;
- if ((column_count + 1) == (MAX_COLUMNS)) {
- error ("Too many -c options");
- }
-
- for (j = 0; isdigit (optarg[j]); ++j) {
- }
- if (j == 0) {
- error ("-c option not followed by number");
- }
- first_column[column_count] = atoi (optarg) - 1;
- /* Zero relative */
-
- if (first_column[column_count] < 0) {
- error ("Column specification less than column 1");
- }
-
- if (optarg[j] != ',') {
- error ("Column specifications not separated by comma");
- }
-
- optarg += j + 1;
-
- for (j = 0; isdigit (optarg[j]); ++j) {
- }
- if (j == 0) {
- error ("-c option not followed by two numbers");
- }
- last_column[column_count] = atoi (optarg) - 1;
- /* Zero relative */
- if (last_column[column_count] < first_column[column_count]) {
- error ("Last column spec. less then first column spec.");
- }
-
- max_record_len = max(max_record_len,
- last_column[column_count] + 1);
-
- column_count++;
- break;
-
- /*
- * D option: Debug. Print debug output.
- */
- case 'd':
- switch (*optarg) {
- case 'a':
- pa_debug = TRUE;
- break;
- case '1':
- p1_debug = TRUE;
- break;
- case '2':
- p2_debug = TRUE;
- break;
- case '3':
- p3_debug = TRUE;
- break;
- case '4':
- p4_debug = TRUE;
- break;
- case '5':
- p5_debug = TRUE;
- break;
- default:
- error ("invalid argument following -d option");
- }
- break;
-
- /*
- * h option: name of file to output HED edit file to
- */
- case 'h':
- #ifdef VOS
- stdout -> carriage_control = FALSE;
- #endif
- hed_flag = TRUE;
- break;
-
- /*
- * -P option: Number of prefix lines to output to listing file.
- * -p option: Number of postfix lines to output to listing file.
- */
- case 'P':
- prefix_lines = atoi (optarg);
- if (prefix_lines > CACHE_ENTRIES - 10) {
- error ("Too many prefix lines");
- }
- break;
-
- case 'p':
- postfix_lines = atoi (optarg);
- break;
-
- /*
- * -s option: Output page of statistics to stdout
- */
- case 's':
- statistics_flag = TRUE;
- break;
-
- /*
- * -1 option: Text string to associate with 'new1' file.
- * -2 option: Text string to associate with 'new2' file.
- */
- case '1':
- files[NEW1_FILE].text_ptr = optarg;
- break;
-
- case '2':
- files[NEW2_FILE].text_ptr = optarg;
- break;
-
- /*
- * Q option: Quiet. Produce no output if no differences.
- */
- case 'q':
- quiet_option = TRUE;
- break;
-
- /*
- * L option: specify #lines/page for output listing
- */
- case 'L':
- page_length = atoi (optarg);
- if ( page_length < 0 )
- page_length = PAGE_LENGTH;
- if ( page_length && (page_length - HEAD_LENGTH) < 0 )
- page_length += HEAD_LENGTH;
- break;
-
- default:
- print_usage(2);
- break;
- }
-
- }
-
- /*
- * Handle each command line argument.
- */
- for (i = optind; i < argc; ++i) {
- #ifdef VOS
- /*
- * Handle redirections of 'stdin':
- *
- * This code won't get executed on a UNIX O.S. However,
- * on VOS this code allows the same syntax to work.
- */
- if (argv[i][0] == '<' && argv[i][1] != '\0') {
- dummy_file = freopen (&argv[i][1], "r", stdin);
-
- if (dummy_file == 0) {
- perror(&argv[i][1]);
- exit( 2 ) ;
- }
-
- /*
- * Handle redirections of 'stdout':
- *
- * This code won't get executed on a UNIX O.S. However, on VOS this
- * code allows the same syntax to work.
- */
- } else if (argv[i][0] == '>' && argv[i][1] != '\0') {
- dummy_file = freopen (&argv[i][1], "w", stdout);
-
- if (dummy_file == 0) {
- perror(&argv[i][1]);
- exit(2);
- }
-
- /*
- * Handle file arguments not preceeded by a specific option argument.
- */
- } else {
- #endif
- if (file_count >= MAX_FILE_COUNT) {
- error ("Too many files specified");
- }
-
- files[file_count].name_ptr = argv[i];
-
- stat (files[file_count].name_ptr, &stat_buf);
- is_directory[file_count] =
- (stat_buf.st_mode & S_IFMT) == S_IFDIR;
- if (is_directory[file_count]) {
- directory_count++;
- } else {
- temp_ptr = strrchr (argv[i], '/');
- if (temp_ptr == 0) {
- temp_ptr = argv[i];
- }
- if (basename_ptr &&
- strcmp (temp_ptr, basename_ptr) != 0) {
- different_basenames = 1;
- }
- basename_ptr = temp_ptr;
- }
-
- file_count++;
- #ifdef VOS
- }
- #endif
- }
-
- /*
- * Resolve actual file names and open files.
- *
- * The name specified on the command line might be a directory name.
- */
-
- if (file_count < 2) {
- error ("not enough files specified");
- }
- if (file_count == directory_count) {
- error ("cannot compare directories");
- }
- if (directory_count != 0 &&
- file_count - directory_count > 1 &&
- different_basenames) {
- error ("ambiguous directory name");
- }
-
- total_record_count = 0;
- for (i = 0; i < file_count; ++i) {
-
- if (is_directory[i]) {
- temp_ptr = mem_alloc (strlen (files[i].name_ptr) +
- strlen (basename_ptr) + 2);
- sprintf (temp_ptr, "%s/%s", files[i].name_ptr,
- basename_ptr);
- files[i].name_ptr = temp_ptr;
- }
-
- #ifdef VOS
- files[i].seq_fd =
- fopen (files[i].name_ptr, "r", max_record_len, "s", $OPEN_DB);
- files[i].rnd_fd =
- fopen (files[i].name_ptr, "r", max_record_len, "s", $OPEN_RMAI);
- #else
- files[i].seq_fd = fopen (files[i].name_ptr, "r");
- files[i].rnd_fd = fopen (files[i].name_ptr, "r");
- #endif
-
- if (files[i].seq_fd == 0 || files[i].rnd_fd == 0) {
- perror(files[i].name_ptr);
- exit(2);
- }
-
- fstat (fileno (files[i].seq_fd), &stat_buf);
-
- temp_ptr = ctime (&(stat_buf.st_mtime));
- temp_ptr[strlen (temp_ptr) - 1] = '\0';
- files[i].lw_ptr = mem_alloc (strlen (temp_ptr) + 1);
- strcpy (files[i].lw_ptr, temp_ptr);
-
- setvbuf (files[i].seq_fd, mem_alloc (stat_buf.st_blksize),
- _IOFBF, stat_buf.st_blksize);
- setvbuf (files[i].rnd_fd, mem_alloc (stat_buf.st_blksize),
- _IOFBF, stat_buf.st_blksize);
-
- /* estimate record count by assuming 20 chars per record */
- /* Don't allow overly small record counts */
- record_count = max( stat_buf.st_size / 20, RA_ORIG);
- files[i].record_array_alloc = record_count;
- total_record_count += record_count;
-
- files[i].record = (record_type *)
- mem_alloc (record_count * sizeof (record_type));
-
- }
-
- /*
- * Sort column ranges into ascending order.
- */
- for (i = 0; i + 1 < column_count; ++i) {
- for (j = i + 1; j < column_count; ++j) {
- if (first_column[i] > first_column[j]) {
- k = first_column[i];
- first_column[i] = first_column[j];
- first_column[j] = k;
- k = last_column[i];
- last_column[i] = last_column[j];
- last_column[j] = k;
- }
- }
- }
-
- /*
- * Ensure there are no overlapping column ranges.
- */
- for (i = 0; i + 1 < column_count; ++i) {
- if (last_column[i] >= first_column[i + 1]) {
- error ("overlaping column ranges specified");
- }
- }
-
- /*
- * Allocate cache entries.
- *
- * Cache entries include an extra word at the end of the buffer.
- * This word allows a word of blanks to be inserted after the end
- * of each read line. This, in turn, allows hash code computations
- * and line comparisons to be word oriented rather than byte oriented.
- *
- * The cache is allocated in one chunk below for two reasons:
- * 1) For small files the huge number of allocations consumes
- * significant time.
- * 2) Less memory is used since mem_alloc allocates a block
- * which is larger than is actually requested. (The next larger
- * power of two.)
- */
- cache_entry_size =
- sizeof (cache_entry_type) + sizeof (int) + max_record_len;
- cache_entry_size += sizeof (int) - (cache_entry_size % sizeof (int));
- the_cache = mem_alloc (CACHE_ENTRIES * cache_entry_size);
- for (i = 0; i < CACHE_ENTRIES; ++i) {
- cache_ptr = (cache_entry_type *) the_cache;
- cache_ptr -> recordp = the_cache + sizeof(cache_entry_type);
- cache_ptr -> record_alen = cache_entry_size -
- sizeof(cache_entry_type);
- cache_ptr -> hash_code = HASH_FREE_ENTRY;
- enq_head_dll (cache_head_ptr, cache_tail_ptr, cache_ptr,
- cache_next_ptr, cache_prev_ptr);
- the_cache += cache_entry_size;
- }
-
- /*
- * Compute size of symbol table.
- *
- * 1) Initially quess size of symbol table as the sum of the number of
- * records in all of the input files times 2.
- * 2) Never allocate a symbol table of less than 1024 entries. (This step
- * is required due to the organization of the prime number table.)
- * 3) Round the size down to a multiple of 1024. (This tries to force the
- * symbol table to be an integer number of pages. It also limits the
- * size of the prime number table).
- * 4) Round the size down to a prime number. (The hashing algorithm requires*
- * that the size of the table is a prime number).
- */
- sym_tab_size = total_record_count * 2;
- sym_tab_size = max (1024, sym_tab_size);
-
- /* Prime number table contains only those primes which are less than
- and closest to a multiple of 1024 */
- for (i = 1; primes[i] != -1; ++i) {
- if (sym_tab_size < primes[i]) {
- break;
- }
- }
-
- sym_tab_size = primes[i - 1];
-
- /*
- * Allocate symbol table.
- */
- for (i = 0; i < file_count; ++i) {
- files[i].sym_tab_index = (int *) mem_alloc (sym_tab_size * sizeof (int));
- }
-
- sym_tab_cache_ptr = (cache_entry_type **)
- mem_alloc (sym_tab_size * sizeof (cache_entry_type *));
-
- }
- /*
- * link_records: link two records together.
- *
- * This routine links a record in the current file to a record in the
- * corresponding file.
- *
- * If either of these records are already
- * linked to a record in the other file, finish up all of the
- * linkages. Pass5 considers it an inconsistent state if only two of
- * the three linkages between files are made. Usually, this inconsistent
- * state will clear itself up. However, certain input files will indeed
- * allow the inconsistency to remain.
- *
- * Note: This routine also discovers an attempt to link records in an
- * impossible fashion. Suppose, this record in the 'current' file is
- * already linked to record A in the 'other' file. This record in the
- * 'corresponding' file is already linked to record B in the 'other' file.
- * Any attempt to link the current and corresponding records would
- * require that record A and record B be the same record (impossible).
- * In that circumstance, this routine acts as a no-op. The calling
- * routine is not informed since this new information wouldn't change the
- * decision making process which it is going through.
- *
- * Return value:
- * This procedure has no return value.
- */
- void link_records (match_no, index1, index2)
- int match_no; /* input */
- /* Which relationship is being scanned */
-
- int index1; /* Index into the current file of the record to
- link. */
-
- int index2; /* Index into the corresponding file of the
- record to link. */
-
- {
-
- file_type * file1_ptr; /* First file - current_file */
-
- file_type * file2_ptr; /* Second file - corresponding file */
-
- file_type * file3_ptr; /* Third file - other file */
-
- int file1_sub; /* For each record of the first file, this is a
- subscript of the 'value' array of the
- relationship between file1 and file2 */
-
- int file2_sub; /* For each record of the second file, this is
- a subscript of the 'value' array of the
- relationship between file2 and file1 */
-
- int file3_sub; /* For each record of the third file, this is a
- subscript of the 'value' array of the
- relationship between file3 and file1 */
-
- int hash_code; /* Hash code for the record being linked. */
-
- int index3; /* Index into record array of file3 is the
- 'next' record in file3 */
-
- int *other_val1_ptr; /* Pointer to the 'value' field in the record
- on file1. This is the 'value' which
- indicates the relationship to file3. */
-
- int *other_val2_ptr; /* Pointer to the 'value' field in the record
- on file2. This is the 'value' which
- indicates the relationship to file3. */
-
- int *val1_ptr; /* Pointer to the 'value' field in record on
- file1. This is the 'value' which indicates
- the relationship to file2. */
-
- int *val2_ptr; /* Pointer to the 'value' field in record on
- file2. This is the 'value' which indicates
- the relationship to file1. */
-
- int *val3_ptr; /* Pointer to the 'value' field in record on
- file3. */
-
-
-
- /*
- * Set up misc local variables.
- */
-
- if (p3_debug || p4_debug) {
- printf ("link_records: matchno: %d indices: %d %d\n",
- match_no, index1, index2);
- }
-
- file1_ptr = &files[curr_file[match_no]];
- file2_ptr = &files[corres_file[match_no]];
- file1_sub = value_sub[match_no];
- file2_sub = rev_value_sub[match_no];
-
- /*
- * Link the two records together.
- */
-
- val1_ptr = &(file1_ptr -> record[index1].value[file1_sub]);
- val2_ptr = &(file2_ptr -> record[index2].value[file2_sub]);
-
- hash_code = *val1_ptr;
- *val1_ptr = index2;
- *val2_ptr = index1;
-
- /*
- * If either of these two records are already linked to the third file,
- * connect these two record to the record in the third file.
- */
-
- other_val1_ptr =
- &(file1_ptr -> record[index1].value[other_sub (file1_sub)]);
- other_val2_ptr =
- &(file2_ptr -> record[index2].value[other_sub (file2_sub)]);
-
- if (is_hash_code (*other_val1_ptr)) {
- if (*other_val1_ptr != hash_code) {
- error ("hash code mis-match 1");
- }
- if (is_hash_code (*other_val2_ptr)) {
- if (*other_val2_ptr != hash_code) {
- error ("hash code mis-match 2");
- }
- return;
- } else {
- index3 = *other_val2_ptr;
- *other_val1_ptr = index3;
- }
- } else {
- index3 = *other_val1_ptr;
- if (is_hash_code (*other_val2_ptr)) {
- if (*other_val2_ptr != hash_code) {
- error ("hash code mis-match 3");
- }
- *other_val2_ptr = index3;
- } else {
- if (*other_val1_ptr != *other_val2_ptr) {
- /* error( "other file index mismatch 1" ) ; */
- /* In this error condition, just undo what
- we've already done */
- *val1_ptr = hash_code;
- *val2_ptr = hash_code;
- return;
- }
- }
- }
-
- /*
- * Connect the record in the third file to the record in the first file.
- */
- file3_ptr = &files[other_file[match_no]];
- file3_sub = other_value_sub[match_no];
- val3_ptr = &(file3_ptr -> record[index3].value[file3_sub]);
-
- if (is_hash_code (*val3_ptr)) {
- if (*val3_ptr != hash_code) {
- error ("hash code mis-match 4");
- }
- *val3_ptr = index1;
- } else {
- if (*val3_ptr != index1) {
- error ("other file index mismatch 2");
- }
- }
-
- /*
- * Connect the record in the third file to the record in the second file.
- */
-
- val3_ptr =
- &(file3_ptr -> record[index3].value[other_sub (file3_sub)]);
-
- if (is_hash_code (*val3_ptr)) {
- if (*val3_ptr != hash_code) {
- error ("hash code mis-match 5");
- }
- *val3_ptr = index2;
- } else {
- if (*val3_ptr != index2) {
- error ("other file index mismatch 3");
- }
- }
-
- }
- /*
- * error: output fatal error message
- *
- * This routine outputs an error message and terminates.
- *
- * Return value:
- * This procedure has no return value.
- */
-
- void error (error_ptr)
- char *error_ptr; /* input */
- /* Record to output. */
-
- {
- fprintf (stderr, "combine: %s.\n", error_ptr);
- exit (2);
- }
- /*
- * mem_alloc: allocate memory
- *
- * This routine uses the standard memory allocator, heowever, if memory
- * is not available, this routine outputs an error message and terminates.
- *
- * Return value:
- * This procedure returns a pointer to the allocated block.
- */
- char *mem_alloc (size)
- int size; /* input */
- /* Size (in bytes) of the block to allocate */
-
- {
-
- char *block_ptr; /* Misc. variable */
-
- extern char *malloc ();
-
- block_ptr = malloc (size);
- if (block_ptr == 0) {
- error ("not enough memory -- files too big");
- }
-
- return (block_ptr);
-
- }
-
- /*
- * reread_into_cache -- re-read a record from a file into a cache entry
- *
- * This routine is used to re-read a record (which has previously been
- * read) into a cache entry.
- */
- void reread_into_cache( file_ptr, index, cache_ptr )
- file_type * file_ptr; /* file to be read from */
- int index; /* record number to read */
- cache_entry_type * cache_ptr; /* cache entry to read into */
- {
- int status;
- char mbuffer[LINE_LENGTH];
-
- status = fseek (file_ptr->rnd_fd, file_ptr->record[index].rfa, 0);
- if ( status == -1 ) {
- (void) sprintf (mbuffer, "Disk error while seeking '%s'",
- file_ptr -> name_ptr);
- error (mbuffer);
- }
-
- status = read_into_cache(file_ptr->rnd_fd,
- file_ptr->record[index].rfa,
- cache_ptr);
-
- if (status < 0) {
- (void) sprintf (mbuffer, "Disk error while re-reading '%s'",
- file_ptr -> name_ptr);
- error (mbuffer);
- }
- }
-
- /*
- * read_into_cache -- read a record from a file into a cache entry
- *
- * Read a record into a cache entry. This routine reads an entire record
- * into the cache entry. If the currently allocated buffer is too small,
- * a larger buffer will be allocated.
- *
- * Return Value:
- * Byte count read (-1 for EOF)
- */
- int read_into_cache( fp, rfa, cache_ptr)
- FILE *fp; /* File to read */
- rfa_type rfa; /* rfa to read (already positioned) */
- cache_entry_type * cache_ptr; /* cache entry to read into */
- {
- char c;
- char *char_ptr;
- int status;
- int i;
-
- char_ptr = fgets (cache_ptr->recordp, cache_ptr->record_alen-sizeof(int), fp);
- if (char_ptr == NULL)
- return (-1);
-
- i = strlen (cache_ptr->recordp) - 1;
- if (cache_ptr->recordp[i] != '\n') {
- status = fseek (fp, rfa, 0);
- if ( status == -1 )
- error("Internal error: cannot reseek");
- for (i=0;;i++) {
- c = getc (fp);
- if (feof (fp)) {
- /* not (c==EOF) because of binary files */
- break;
- /* This is sort of a kludge, we only check for
- non-ascii if the record length is too long */
- } else if (!isascii (c) || c == '\0' ) {
- error ("non-ascii character in file");
- } else if (c == '\n') {
- break;
- }
- }
- i+=2; /* Leave room from newline and null byte */
- i+=sizeof(int); /* leave space at end for extra nulls for
- checksum algorithm */
- i += sizeof (int) - (i % sizeof (int));
-
- /*
- * Don't deallocate the old buffer since it was probably
- * allocated as a part of a larger buffer.
- */
- cache_ptr->recordp = mem_alloc(i);
- cache_ptr->record_alen = i;
-
- status = fseek (fp, rfa, 0);
- if ( status == -1 )
- error("Internal error: cannot reseek");
-
- char_ptr = fgets (cache_ptr->recordp, cache_ptr->record_alen-sizeof(int), fp);
- if (char_ptr == NULL)
- return (-1);
-
- i = strlen (cache_ptr->recordp) - 1;
- /* Perhaps we should warn about this */
- if (cache_ptr->recordp[i] != '\n')
- i++;
- }
- cache_ptr->recordp[i] = '\0';
- cache_ptr->record_length = i;
-
- return (i);
-
- }
-